tutorials-and-examples/how-tos/Automation Setup - Configure Azure Machine Learning Compute Cluster and Managed Identity.ipynb (457 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Automation Setup - Configure Azure Machine Learning Compute Cluster and Managed Identity\r\n",
"\r\n",
"__Notebook Version:__ 1.0<br>\r\n",
"__Python Version:__ Python 3.8 - AzureML<br>\r\n",
"__Required Packages:__ No<br>\r\n",
"__Platforms Supported:__ Azure Machine Learning Notebooks\r\n",
" \r\n",
"__Data Source Required:__ No \r\n",
" \r\n",
"### Description\r\n",
"This is the first notebook of series for setting up Microsoft Sentinel notebook automation platform based on Azure Machine Learning Pipelines.</br>\r\n",
"This notebook provides step-by-step instructions to create Azure Machine Learning compute cluster, and add user assigned managed identity to the compute cluster.</br>\r\n",
"This AML compute cluster will be used as the computing power for Sentinel notebook automation. It can be used for multiple automated notebooks.</br>\r\n",
"Adding user assigned managed identity to the compute cluster will enable schedule notebooks to access tenant's Azure resources.\r\n",
"\r\n",
"*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". *** <br>\r\n",
"\r\n",
"## Table of Contents\r\n",
"1. Warm-up\r\n",
"2. Authentication to Azure Resources\r\n",
"3. User-assigned managed identity\r\n",
"4. Azure Machine Learning Compute Cluster"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 1. Warm-up"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Azure Machine Learning and Pipeline SDK-specific imports\r\n",
"# azureml\r\n",
"import azureml.core\r\n",
"from azureml.core import Workspace, Experiment\r\n",
"from azureml.core.compute import AmlCompute, ComputeTarget\r\n",
"from azureml.core.datastore import Datastore\r\n",
"\r\n",
"# azure common/core\r\n",
"from azure.common.credentials import get_azure_cli_credentials\r\n",
"from azure.mgmt.resource import ResourceManagementClient\r\n",
"\r\n",
"# Python/ipython\r\n",
"import json\r\n",
"from IPython.display import display, HTML, Markdown\r\n",
"\r\n",
"print(\"SDK version:\", azureml.core.VERSION)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"gather": {
"logged": 1641319923703
}
}
},
{
"cell_type": "code",
"source": [
"# Functions will be used in this notebook\r\n",
"def read_config_values(file_path):\r\n",
" \"This loads pre-generated parameters for Microsoft Sentinel Workspace\"\r\n",
" with open(file_path) as json_file:\r\n",
" if json_file:\r\n",
" json_config = json.load(json_file)\r\n",
" return (json_config[\"tenant_id\"],\r\n",
" json_config[\"subscription_id\"],\r\n",
" json_config[\"resource_group\"],\r\n",
" json_config[\"workspace_id\"],\r\n",
" json_config[\"workspace_name\"],\r\n",
" json_config[\"user_alias\"],\r\n",
" json_config[\"user_object_id\"])\r\n",
" return None\r\n",
"\r\n",
"def has_valid_token():\r\n",
" \"Check to see if there is a valid AAD token\"\r\n",
" try:\r\n",
" credentials, sub_id = get_azure_cli_credentials()\r\n",
" creds = credentials._get_cred(resource=None)\r\n",
" token = creds._token_retriever()[2]\r\n",
" print(\"Successfully signed in.\")\r\n",
" return True\r\n",
" except Exception as ex:\r\n",
" if \"Please run 'az login' to setup account\" in str(ex):\r\n",
" print(\"Please sign in first.\")\r\n",
" return False\r\n",
" elif \"AADSTS70043: The refresh token has expired\" in str(ex):\r\n",
" message = \"**The refresh token has expired. <br> Please continue your login process. Then: <br> 1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking 'Compute' on left menu, then select the instance, clicking 'Restart'; <br> 2. Otherwise, you may just restart the kernel from top menu. <br> Finally, close and re-load the notebook, then re-run cells one by one from the top.**\"\r\n",
" display(Markdown(message))\r\n",
" return False\r\n",
" elif \"[Errno 2] No such file or directory: '/home/azureuser/.azure/azureProfile.json'\" in str(ex):\r\n",
" print(\"Please sign in.\")\r\n",
" return False\r\n",
" else:\r\n",
" print(str(ex))\r\n",
" return False\r\n",
" except:\r\n",
" print(\"Please restart the kernel, and run 'az login'.\")\r\n",
" return False"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1641319929197
}
}
},
{
"cell_type": "code",
"source": [
"# Calling the above function to populate Microsoft Sentinel workspace parameters\r\n",
"# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables\r\n",
"tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json');\r\n",
"print(\"Subscription Id: \" + subscription_id)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1641335902117
}
}
},
{
"cell_type": "markdown",
"source": [
"## 2. Authentication to Azure Resources"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.\r\n",
"# You may add [--tenant $tenant_id] to the command\r\n",
"if has_valid_token() == False:\r\n",
" !echo -e '\\e[42m'\r\n",
" !az login --tenant $tenant_id --use-device-code"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1641319938155
}
}
},
{
"cell_type": "markdown",
"source": [
"## 3. User-assigned Managed Identity "
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# 1. Please enter name of an Azure resource group with which you want to create an user assigned managed identity\r\n",
"resource_group = 'myresourcegroup'"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1641335957503
}
}
},
{
"cell_type": "code",
"source": [
"# 2. Please enter name for an existing user assigned managed identity or for creating a new user assigned managed identity\r\n",
"user_assigned_managed_identity = 'myuai2022'"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1641335960633
}
}
},
{
"cell_type": "code",
"source": [
"# 3. Create a new user assigned managed identity if it doesn't exist\r\n",
"id_list = !az identity list --subscription $subscription_id -g $resource_group\r\n",
"\r\n",
"if len(id_list.grep('\"name\"', field=0).grep(user_assigned_managed_identity, field=1)) > 0:\r\n",
" print('Found existing user-assigned managed identity.')\r\n",
"else:\r\n",
" print('Create a new user-assigned managed identity.')\r\n",
" !az identity create --subscription $subscription_id -g $resource_group -n $user_assigned_managed_identity\r\n",
" new_uamiList = !az identity list --subscription $subscription_id -g $resource_group"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1641335969833
}
}
},
{
"cell_type": "code",
"source": [
"# 4. Assign the user assigned managed identity a contributor access to the target resource: resource_group, as default\r\n",
"# NEED TO RUN ONLY ONCE\r\n",
"principal_id_raw = !az identity show --subscription $subscription_id -g $resource_group --name $user_assigned_managed_identity --query principalId\r\n",
"uami_id_raw = !az identity show --subscription $subscription_id -g $resource_group --name $user_assigned_managed_identity --query id\r\n",
"principal_id = principal_id_raw[0][1:-1]\r\n",
"uami_id = uami_id_raw[0][1:-1]\r\n",
"print(uami_id)\r\n",
"\r\n",
"target_resource_id = '/subscriptions/{0}/resourceGroups/{1}'.format(subscription_id, resource_group)\r\n",
"!az role assignment create --assignee $principal_id --role 'Contributor' --scope $target_resource_id"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1641337288050
}
}
},
{
"cell_type": "markdown",
"source": [
"## 4. Azure Machine Learning Compute Cluster"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# 1. Please enter name for an existing compute cluster or for creating a new compute cluster\r\n",
"amlcompute_cluster_name = 'compcl2022'"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1641336983503
}
}
},
{
"cell_type": "code",
"source": [
"# 2. Get AML workspace\r\n",
"# Enter current AML workspace name\r\n",
"current_aml_workspace_name = 'auto2022'\r\n",
"ws = Workspace.get(name=current_aml_workspace_name, subscription_id=subscription_id, resource_group=resource_group)\r\n",
"print(ws)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1641336925324
}
}
},
{
"cell_type": "code",
"source": [
"# 2. Check if this compute cluster already exists in the workspace. If not, a new one will be created.\r\n",
"# The newly created user assigned managed identity was added to the new compute cluster\r\n",
"found = False\r\n",
"cts = ws.compute_targets\r\n",
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\r\n",
" found = True\r\n",
" print('Found existing compute target.')\r\n",
" compute_target = cts[amlcompute_cluster_name]\r\n",
"if not found:\r\n",
" print('Creating a new compute target...')\r\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", max_nodes = 4, identity_type=\"UserAssigned\", identity_id=[uami_id]) \r\n",
" # for GPU, use \"STANDARD_NC6\" #vm_priority = 'lowpriority', # optional \r\n",
" \r\n",
" # Create the cluster. For a more detailed view of current AmlCompute status, use get_status().\r\n",
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\r\n",
" compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1641337998748
}
}
}
],
"metadata": {
"kernelspec": {
"name": "python3-azureml",
"language": "python",
"display_name": "Python 3.6 - AzureML"
},
"language_info": {
"name": "python",
"version": "3.6.9",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"kernel_info": {
"name": "python3-azureml"
},
"microsoft": {
"host": {
"AzureML": {
"notebookHasBeenCompleted": true
}
}
},
"nteract": {
"version": "nteract-front-end@1.0.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}